This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
# library for LMM
library(lme4)
library(lmerTest)
library(car)
Loading required package: carData
df<-read.csv("input/scores_commits.csv", header =TRUE, sep=",")
df <- df[complete.cases(df), ] # Apply complete.cases function
df
# convert to nominal factor
df$Group = factor(df$Group)
df$phase = factor(df$phase)
df$log_novelty <- log(df$novelty+1)
df$log_user_requirement <- log(df$user.requirement+1)
df$log_infovis <- log(df$infovis+1)
df$log_total <- log(df$total+1)
df$log_count <- log(df$count+1)
df$Q7_Q7_1 <- log(df$Q7_Q7_1+1)
df$Q7_Q7_2 <- log(df$Q7_Q7_2+1)
df$Q8_Q8_1 <- log(df$Q8_Q8_1+1)
df$Q10 <- log(df$Q10+1)
# standardizing variables for skills and aspirations.
cols <- c("Q7_Q7_1", "Q7_Q7_2", "Q8_Q8_1", "Q10", "log_novelty", "log_user_requirement", "log_infovis", "log_total", "log_count")
df[cols] <- scale(df[cols])
df
mod.reduce.novelty <- lm( log_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(mod.reduce.novelty)
Call:
lm(formula = log_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.8291 -0.8409 0.1955 0.8140 1.7601
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.457e-15 3.280e-02 0.000 1.00000
log_count 2.970e-01 3.313e-02 8.964 < 2e-16 ***
Q7_Q7_1 -1.967e-01 4.120e-02 -4.775 2.14e-06 ***
Q7_Q7_2 1.805e-01 4.223e-02 4.275 2.14e-05 ***
Q8_Q8_1 3.738e-03 3.627e-02 0.103 0.91795
Q10 1.034e-01 3.539e-02 2.921 0.00359 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9276 on 794 degrees of freedom
Multiple R-squared: 0.1449, Adjusted R-squared: 0.1395
F-statistic: 26.91 on 5 and 794 DF, p-value: < 2.2e-16
AIC(mod.reduce.novelty)
[1] 2158.067
BIC(mod.reduce.novelty)
[1] 2190.859
mod.full.novelty <- lm( log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(mod.full.novelty)
Call:
lm(formula = log_novelty ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.8606 -0.8544 0.1794 0.8162 1.8526
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.129451 0.068285 -1.896 0.05836 .
factor(Group)1 0.237230 0.094751 2.504 0.01249 *
factor(Group)2 0.168224 0.094639 1.778 0.07586 .
factor(Group)3 0.099722 0.094145 1.059 0.28981
log_count 0.292623 0.033134 8.832 < 2e-16 ***
Q7_Q7_1 -0.199272 0.041287 -4.826 1.67e-06 ***
Q7_Q7_2 0.178203 0.042382 4.205 2.91e-05 ***
Q8_Q8_1 0.001541 0.036257 0.043 0.96611
Q10 0.099003 0.035891 2.758 0.00594 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9254 on 791 degrees of freedom
Multiple R-squared: 0.1523, Adjusted R-squared: 0.1437
F-statistic: 17.76 on 8 and 791 DF, p-value: < 2.2e-16
AIC(mod.full.novelty)
[1] 2157.128
BIC(mod.full.novelty)
[1] 2203.974
anova(mod.reduce.novelty, mod.full.novelty)
Analysis of Variance Table
Model 1: log_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Model 2: log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 683.22
2 791 677.32 3 5.9001 2.2968 0.07633 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
Loading required package: leaps
Loading required package: SuppDists
step(lm(log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-115.17
log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10
Df Sum of Sq RSS AIC
- Q8_Q8_1 1 0.002 677.32 -117.172
<none> 677.32 -115.173
- factor(Group) 3 5.900 683.22 -114.235
- Q10 1 6.515 683.84 -109.515
- Q7_Q7_2 1 15.138 692.46 -99.490
- Q7_Q7_1 1 19.947 697.27 -93.954
- log_count 1 66.787 744.11 -41.940
Step: AIC=-117.17
log_novelty ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q10
Df Sum of Sq RSS AIC
<none> 677.32 -117.172
- factor(Group) 3 5.908 683.23 -116.224
- Q10 1 7.106 684.43 -110.823
- Q7_Q7_2 1 15.596 692.92 -100.960
- Q7_Q7_1 1 20.066 697.39 -95.815
- log_count 1 67.067 744.39 -43.638
Call:
lm(formula = log_novelty ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q10, data = df)
Coefficients:
(Intercept) factor(Group)1 factor(Group)2 factor(Group)3 log_count Q7_Q7_1 Q7_Q7_2
-0.12958 0.23732 0.16843 0.09995 0.29271 -0.19912 0.17849
Q10
0.09942
mod.reduce.ur <- lm(log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.reduce.ur)
Call:
lm(formula = log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-1.9234 -0.9854 0.3754 0.7441 1.6632
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.509e-16 3.348e-02 0.000 1.000000
log_count 2.476e-01 3.383e-02 7.319 6.11e-13 ***
Q7_Q7_1 -1.897e-01 4.207e-02 -4.509 7.51e-06 ***
Q7_Q7_2 1.157e-01 4.311e-02 2.685 0.007412 **
Q8_Q8_1 -4.640e-03 3.703e-02 -0.125 0.900327
Q10 1.194e-01 3.613e-02 3.305 0.000993 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9471 on 794 degrees of freedom
Multiple R-squared: 0.1086, Adjusted R-squared: 0.103
F-statistic: 19.36 on 5 and 794 DF, p-value: < 2.2e-16
AIC(mod.reduce.ur)
[1] 2191.288
BIC(mod.reduce.ur)
[1] 2224.081
mod.full.ur <- lm(log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.full.ur)
Call:
lm(formula = log_user_requirement ~ factor(Group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.0497 -0.9697 0.3109 0.7168 1.7587
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.20771 0.06930 -2.997 0.002809 **
factor(Group)1 0.30843 0.09616 3.207 0.001393 **
factor(Group)2 0.15234 0.09604 1.586 0.113097
factor(Group)3 0.34981 0.09554 3.661 0.000268 ***
log_count 0.23922 0.03363 7.114 2.53e-12 ***
Q7_Q7_1 -0.19761 0.04190 -4.716 2.84e-06 ***
Q7_Q7_2 0.11979 0.04301 2.785 0.005482 **
Q8_Q8_1 -0.01073 0.03680 -0.292 0.770735
Q10 0.11094 0.03642 3.046 0.002398 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9391 on 791 degrees of freedom
Multiple R-squared: 0.1269, Adjusted R-squared: 0.1181
F-statistic: 14.37 on 8 and 791 DF, p-value: < 2.2e-16
AIC(mod.full.ur)
[1] 2180.715
BIC(mod.full.ur)
[1] 2227.561
anova(mod.reduce.ur, mod.full.ur)
Analysis of Variance Table
Model 1: log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Model 2: log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 712.19
2 791 697.59 3 14.602 5.5192 0.0009401 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
step(lm(log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-91.59
log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10
Df Sum of Sq RSS AIC
- Q8_Q8_1 1 0.075 697.66 -93.500
<none> 697.59 -91.586
- Q7_Q7_2 1 6.840 704.43 -85.780
- Q10 1 8.181 705.77 -84.258
- factor(Group) 3 14.602 712.19 -81.013
- Q7_Q7_1 1 19.617 717.20 -71.400
- log_count 1 44.633 742.22 -43.971
Step: AIC=-93.5
log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q10
Df Sum of Sq RSS AIC
<none> 697.66 -93.500
- Q7_Q7_2 1 6.788 704.45 -87.755
- Q10 1 8.389 706.05 -85.938
- factor(Group) 3 14.541 712.20 -82.997
- Q7_Q7_1 1 19.975 717.64 -72.917
- log_count 1 44.575 742.24 -45.954
Call:
lm(formula = log_user_requirement ~ factor(Group) + log_count +
Q7_Q7_1 + Q7_Q7_2 + Q10, data = df)
Coefficients:
(Intercept) factor(Group)1 factor(Group)2 factor(Group)3 log_count Q7_Q7_1 Q7_Q7_2
-0.2068 0.3078 0.1509 0.3482 0.2386 -0.1987 0.1178
Q10
0.1080
mod.reduce.vis <- lmer( log_infovis ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | Student) + ( 1 | phase) , data = df, REML = FALSE)
summary(mod.reduce.vis)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_infovis ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
1686.8 1729.0 -834.4 1668.8 791
Scaled residuals:
Min 1Q Median 3Q Max
-4.3459 -0.2711 0.0568 0.4528 3.7219
Random effects:
Groups Name Variance Std.Dev.
Student (Intercept) 0.56556 0.7520
phase (Intercept) 0.04482 0.2117
Residual 0.28839 0.5370
Number of obs: 800, groups: Student, 159; phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.004387 0.113506 9.331211 -0.039 0.969986
log_count 0.101909 0.026572 738.704420 3.835 0.000136 ***
Q7_Q7_1 -0.207401 0.078496 155.246118 -2.642 0.009080 **
Q7_Q7_2 0.220425 0.080348 153.994778 2.743 0.006803 **
Q8_Q8_1 -0.063161 0.068976 153.572132 -0.916 0.361259
Q10 0.113008 0.067332 153.554567 1.678 0.095310 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
log_count 0.002
Q7_Q7_1 0.005 0.043
Q7_Q7_2 -0.002 -0.023 -0.556
Q8_Q8_1 0.002 -0.024 -0.075 -0.172
Q10 0.002 -0.025 -0.009 -0.126 -0.275
AIC(mod.reduce.vis)
[1] 1686.82
BIC(mod.reduce.vis)
[1] 1728.981
mod.full.vis <- lmer( log_infovis ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | Student) + ( 1 | phase) , data = df, REML = FALSE)
summary(mod.full.vis)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_infovis ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
1686.0 1742.3 -831.0 1662.0 788
Scaled residuals:
Min 1Q Median 3Q Max
-4.3644 -0.2699 0.0488 0.4521 3.7079
Random effects:
Groups Name Variance Std.Dev.
Student (Intercept) 0.53930 0.7344
phase (Intercept) 0.04475 0.2115
Residual 0.28841 0.5370
Number of obs: 800, groups: Student, 159; phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.28898 0.15869 32.79179 -1.821 0.077738 .
factor(Group)1 0.39368 0.17736 153.34225 2.220 0.027905 *
factor(Group)2 0.30674 0.17669 153.35371 1.736 0.084557 .
factor(Group)3 0.41230 0.17566 153.43206 2.347 0.020195 *
log_count 0.10225 0.02653 741.52333 3.854 0.000126 ***
Q7_Q7_1 -0.21031 0.07709 155.17239 -2.728 0.007106 **
Q7_Q7_2 0.21722 0.07909 153.92204 2.746 0.006744 **
Q8_Q8_1 -0.07213 0.06766 153.47966 -1.066 0.288008
Q10 0.10932 0.06698 153.45530 1.632 0.104668
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) fc(G)1 fc(G)2 fc(G)3 lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
factr(Grp)1 -0.577
factr(Grp)2 -0.580 0.511
factr(Grp)3 -0.583 0.522 0.525
log_count 0.014 -0.023 -0.005 -0.018
Q7_Q7_1 0.001 -0.031 0.047 -0.009 0.045
Q7_Q7_2 0.026 0.001 -0.089 -0.010 -0.025 -0.559
Q8_Q8_1 0.038 -0.021 -0.051 -0.059 -0.024 -0.077 -0.167
Q10 -0.007 -0.081 0.095 0.015 -0.022 0.005 -0.140 -0.276
AIC(mod.full.vis)
[1] 1686.045
BIC(mod.full.vis)
[1] 1742.26
anova(mod.reduce.vis, mod.full.vis)
Data: df
Models:
mod.reduce.vis: log_infovis ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
mod.full.vis: log_infovis ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
mod.reduce.vis 9 1686.8 1729.0 -834.41 1668.8
mod.full.vis 12 1686.0 1742.3 -831.02 1662.0 6.7749 3 0.07943 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
step(lm(log_infovis ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start: AIC=-131.12
log_infovis ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Df Sum of Sq RSS AIC
<none> 663.96 -131.117
- Q8_Q8_1 1 4.428 668.38 -127.799
- Q10 1 6.615 670.57 -125.186
- Group 3 18.523 682.48 -115.104
- Q7_Q7_1 1 16.528 680.48 -113.446
- Q7_Q7_2 1 19.340 683.30 -110.147
- log_count 1 68.596 732.55 -54.463
Call:
lm(formula = log_infovis ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Coefficients:
(Intercept) Group1 Group2 Group3 log_count Q7_Q7_1 Q7_Q7_2 Q8_Q8_1 Q10
-0.27221 0.37203 0.30113 0.38910 0.29656 -0.18139 0.20142 -0.08245 0.09975
mod.reduce.total <- lm( log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.reduce.total)
Call:
lm(formula = log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1861 -0.1993 0.2443 0.5703 1.4738
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.422e-16 3.222e-02 0.000 1.000000
log_count 3.419e-01 3.255e-02 10.503 < 2e-16 ***
Q7_Q7_1 -1.852e-01 4.048e-02 -4.576 5.51e-06 ***
Q7_Q7_2 1.875e-01 4.148e-02 4.520 7.12e-06 ***
Q8_Q8_1 -8.656e-02 3.563e-02 -2.429 0.015349 *
Q10 1.241e-01 3.477e-02 3.570 0.000378 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9113 on 794 degrees of freedom
Multiple R-squared: 0.1748, Adjusted R-squared: 0.1696
F-statistic: 33.63 on 5 and 794 DF, p-value: < 2.2e-16
AIC(mod.reduce.total)
[1] 2129.645
BIC(mod.reduce.total)
[1] 2162.437
mod.full.total <- lm( log_total ~ Group + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod.full.total)
Call:
lm(formula = log_total ~ Group + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.9973 -0.1279 0.2773 0.5482 1.4041
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.29834 0.07061 -4.225 2.66e-05 ***
Group1 0.43251 0.09786 4.420 1.13e-05 ***
Group2 0.28093 0.09793 2.869 0.004231 **
Group3 0.45080 0.09734 4.631 4.25e-06 ***
Q7_Q7_1 -0.23066 0.04253 -5.424 7.76e-08 ***
Q7_Q7_2 0.20946 0.04380 4.783 2.06e-06 ***
Q8_Q8_1 -0.07307 0.03745 -1.951 0.051416 .
Q10 0.13970 0.03708 3.767 0.000177 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9576 on 792 degrees of freedom
Multiple R-squared: 0.09105, Adjusted R-squared: 0.08302
F-statistic: 11.33 on 7 and 792 DF, p-value: 9.643e-14
AIC(mod.full.total)
[1] 2210.925
BIC(mod.full.total)
[1] 2253.087
anova(mod.reduce.total, mod.full.total)
Analysis of Variance Table
Model 1: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Model 2: log_total ~ Group + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 659.37
2 792 726.25 2 -66.874
# convert to nominal factor
df$Group = factor(df$Group)
df$phase = factor(df$phase)
library(plyr)
ddply(df, ~ Group * phase, function(data) summary(data$log_novelty) )
ddply(df, ~ Group * phase, summarise, log_novelty.mean=mean(log_novelty), log_novelty.sd = sd(log_novelty))
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_novelty")
with(df, interaction.plot(Group, phase, log_novelty, ylim=c(0, max(log_novelty)))) # interaction plot
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_user_requirement")
with(df, interaction.plot(Group, phase, log_user_requirement, ylim=c(0, max(log_user_requirement)))) # interaction plot
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_infovis")
with(df, interaction.plot(Group, phase, log_infovis, ylim=c(0, max(log_infovis)))) # interaction plot
# histograms for two factors
boxplot(log_novelty ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_total")
with(df, interaction.plot(Group, phase, log_total, ylim=c(0, max(log_total)))) # interaction plot
m = lmer(log_novelty ~ Group + (1|Student), data=df, REML=FALSE)
summary(m)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_novelty ~ Group + (1 | Student)
Data: df
AIC BIC logLik deviance df.resid
2046.0 2074.1 -1017.0 2034.0 794
Scaled residuals:
Min 1Q Median 3Q Max
-2.2669 -0.6407 0.1015 0.6356 2.3143
Random effects:
Groups Name Variance Std.Dev.
Student (Intercept) 0.4473 0.6688
Residual 0.5366 0.7325
Number of obs: 800, groups: Student, 159
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.1667 0.1224 159.4644 -1.361 0.175
Group1 0.2933 0.1698 159.0131 1.728 0.086 .
Group2 0.1952 0.1689 159.4644 1.156 0.249
Group3 0.1401 0.1689 159.4644 0.829 0.408
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) Group1 Group2
Group1 -0.721
Group2 -0.725 0.523
Group3 -0.725 0.523 0.526
plot(resid(m, type = "pearson") ~ fitted(m))
qqnorm(resid(m, type = "pearson"))
qqline(resid(m, type = "pearson"))
# library for LMM we will use on relational log_novelty
library(lme4)
library(lmerTest)
library(car)
contrasts(df$Group) <= "contr.sum"
1 2 3
0 TRUE TRUE TRUE
1 TRUE TRUE TRUE
2 TRUE TRUE TRUE
3 TRUE TRUE TRUE
contrasts(df$phase) <= "contr.sum"
2 3 4 5
1 TRUE TRUE TRUE TRUE
2 TRUE TRUE TRUE TRUE
3 TRUE TRUE TRUE TRUE
4 TRUE TRUE TRUE TRUE
5 TRUE TRUE TRUE TRUE
# phase is nested within group
fit <- lm(log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(fit)
Call:
lm(formula = log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1861 -0.1993 0.2443 0.5703 1.4738
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.422e-16 3.222e-02 0.000 1.000000
log_count 3.419e-01 3.255e-02 10.503 < 2e-16 ***
Q7_Q7_1 -1.852e-01 4.048e-02 -4.576 5.51e-06 ***
Q7_Q7_2 1.875e-01 4.148e-02 4.520 7.12e-06 ***
Q8_Q8_1 -8.656e-02 3.563e-02 -2.429 0.015349 *
Q10 1.241e-01 3.477e-02 3.570 0.000378 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9113 on 794 degrees of freedom
Multiple R-squared: 0.1748, Adjusted R-squared: 0.1696
F-statistic: 33.63 on 5 and 794 DF, p-value: < 2.2e-16
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
library(lsmeans)
Loading required package: emmeans
The 'lsmeans' package is now basically a front end for 'emmeans'.
Users are encouraged to switch the rest of the way.
See help('transition') for more information, including how to
convert old 'lsmeans' objects and scripts to work with 'emmeans'.
#summary(glht(fit, lsm(pairwise ~ roup / phase)), test = adjusted(type='holm'))
fit.full <- lm(log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(fit.full)
Call:
lm(formula = log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 +
Q8_Q8_1 + Q10, data = df)
Residuals:
Min 1Q Median 3Q Max
-3.1191 -0.2201 0.2313 0.5645 1.3650
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.27052 0.06637 -4.076 5.04e-05 ***
Group1 0.37292 0.09209 4.050 5.64e-05 ***
Group2 0.27040 0.09198 2.940 0.003380 **
Group3 0.41236 0.09150 4.507 7.58e-06 ***
log_count 0.33294 0.03220 10.339 < 2e-16 ***
Q7_Q7_1 -0.19094 0.04013 -4.758 2.32e-06 ***
Q7_Q7_2 0.18702 0.04119 4.540 6.49e-06 ***
Q8_Q8_1 -0.09491 0.03524 -2.693 0.007223 **
Q10 0.11965 0.03488 3.430 0.000635 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8994 on 791 degrees of freedom
Multiple R-squared: 0.1993, Adjusted R-squared: 0.1912
F-statistic: 24.61 on 8 and 791 DF, p-value: < 2.2e-16
anova(fit, fit.full)
Analysis of Variance Table
Model 1: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10
Model 2: log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 +
Q10
Res.Df RSS Df Sum of Sq F Pr(>F)
1 794 659.37
2 791 639.79 3 19.585 8.0715 2.664e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# histograms for two factors
boxplot(log_total ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_total")
with(df, interaction.plot(Group, phase, log_total, ylim=c(0, max(log_total)))) # interaction plot
fit.lmer <- lmer(log_total ~ (1 | Group) + (1| phase:Group), data = df, REML= FALSE)
summary(fit.lmer)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_total ~ (1 | Group) + (1 | phase:Group)
Data: df
AIC BIC logLik deviance df.resid
2254.0 2272.7 -1123.0 2246.0 796
Scaled residuals:
Min 1Q Median 3Q Max
-2.94437 -0.01942 0.28792 0.61016 1.43336
Random effects:
Groups Name Variance Std.Dev.
phase:Group (Intercept) 0.03675 0.1917
Group (Intercept) 0.02060 0.1435
Residual 0.94281 0.9710
Number of obs: 800, groups: phase:Group, 20; Group, 4
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.006239 0.090377 3.950735 -0.069 0.948
fit.lmer <- lmer(log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | Student ) + ( 1 | phase), data = df, REML= FALSE)
summary(fit.lmer)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
1630.8 1687.0 -803.4 1606.8 788
Scaled residuals:
Min 1Q Median 3Q Max
-4.1260 -0.2616 0.0485 0.3829 4.0648
Random effects:
Groups Name Variance Std.Dev.
Student (Intercept) 0.56076 0.7488
phase (Intercept) 0.04637 0.2153
Residual 0.26263 0.5125
Number of obs: 800, groups: Student, 159; phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.29274 0.16109 32.72685 -1.817 0.07834 .
Group1 0.40329 0.17976 151.08485 2.243 0.02632 *
Group2 0.27793 0.17908 151.09189 1.552 0.12277
Group3 0.44309 0.17804 151.16138 2.489 0.01390 *
log_count 0.07472 0.02545 730.76115 2.936 0.00343 **
Q7_Q7_1 -0.22864 0.07811 153.04809 -2.927 0.00394 **
Q7_Q7_2 0.20767 0.08016 151.70100 2.591 0.01051 *
Q8_Q8_1 -0.08091 0.06857 151.20635 -1.180 0.23989
Q10 0.13263 0.06788 151.18265 1.954 0.05257 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) Group1 Group2 Group3 lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
Group1 -0.576
Group2 -0.579 0.511
Group3 -0.582 0.522 0.525
log_count 0.013 -0.022 -0.004 -0.017
Q7_Q7_1 0.001 -0.031 0.047 -0.009 0.043
Q7_Q7_2 0.026 0.000 -0.089 -0.010 -0.023 -0.559
Q8_Q8_1 0.038 -0.021 -0.051 -0.059 -0.023 -0.076 -0.167
Q10 -0.007 -0.081 0.095 0.015 -0.021 0.005 -0.140 -0.276
fit.lmer.reduced <- lmer(log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | Student) + ( 1 | phase), data = df, REML=FALSE)
summary(fit.lmer.reduced)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
1632.1 1674.2 -807.0 1614.1 791
Scaled residuals:
Min 1Q Median 3Q Max
-4.1299 -0.2523 0.0382 0.3871 4.0738
Random effects:
Groups Name Variance Std.Dev.
Student (Intercept) 0.59003 0.7681
phase (Intercept) 0.04643 0.2155
Residual 0.26258 0.5124
Number of obs: 800, groups: Student, 159; phase, 5
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.005204 0.115445 9.391843 -0.045 0.96499
log_count 0.073916 0.025489 727.808163 2.900 0.00385 **
Q7_Q7_1 -0.224039 0.079672 153.285562 -2.812 0.00557 **
Q7_Q7_2 0.208533 0.081574 151.923280 2.556 0.01156 *
Q8_Q8_1 -0.071758 0.070034 151.441097 -1.025 0.30718
Q10 0.138773 0.068365 151.424171 2.030 0.04412 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lg_cnt Q7_Q7_1 Q7_Q7_2 Q8_Q8_
log_count 0.002
Q7_Q7_1 0.005 0.041
Q7_Q7_2 -0.002 -0.022 -0.556
Q8_Q8_1 0.002 -0.023 -0.075 -0.172
Q10 0.002 -0.024 -0.009 -0.126 -0.275
anova(fit.lmer.reduced, fit.lmer)
Data: df
Models:
fit.lmer.reduced: log_total ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
fit.lmer: log_total ~ Group + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
fit.lmer.reduced 9 1632.1 1674.2 -807.04 1614.1
fit.lmer 12 1630.8 1687.0 -803.41 1606.8 7.2588 3 0.06409 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# phase is nested within group
fit.requirement.full <- lmer(log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | Student) + ( 1 | phase), data = df, REML = FALSE)
Anova(fit.requirement.full, type=3, test.statistics="F")
Analysis of Deviance Table (Type III Wald chisquare tests)
Response: log_user_requirement
Chisq Df Pr(>Chisq)
(Intercept) 1.8217 1 0.177116
factor(Group) 5.4876 3 0.139384
log_count 8.3467 1 0.003864 **
Q7_Q7_1 8.8566 1 0.002920 **
Q7_Q7_2 3.0003 1 0.083248 .
Q8_Q8_1 0.0042 1 0.948458
Q10 3.1256 1 0.077072 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
fit.requirement.full
Linear mixed model fit by maximum likelihood ['lmerModLmerTest']
Formula: log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 +
Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
1759.1002 1815.3156 -867.5501 1735.1002 788
Random effects:
Groups Name Std.Dev.
Student (Intercept) 0.7193
phase (Intercept) 0.2364
Residual 0.5700
Number of obs: 800, groups: Student, 159; phase, 5
Fixed Effects:
(Intercept) factor(Group)1 factor(Group)2 factor(Group)3 log_count Q7_Q7_1 Q7_Q7_2
-0.22184 0.31507 0.15640 0.36937 0.08078 -0.22668 0.13533
Q8_Q8_1 Q10
-0.00432 0.11695
# histograms for two factors
boxplot(log_user_requirement ~ Group * phase, data = df, xlab="Group.Phase", ylab="log_user_requirement")
with(df, interaction.plot(Group, phase, log_user_requirement, ylim=c(0, max(log_user_requirement)))) # interaction plot
# phase is nested within group
fit.requirement <- lmer(log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + ( 1 | Student) + ( 1 | phase), data = df, REML = FALSE)
Anova(fit, type=3, test.statistics="F")
Anova Table (Type III tests)
Response: log_total
Sum Sq Df F value Pr(>F)
(Intercept) 0.00 1 0.0000 1.000000
log_count 91.62 1 110.3216 < 2.2e-16 ***
Q7_Q7_1 17.39 1 20.9366 5.507e-06 ***
Q7_Q7_2 16.97 1 20.4327 7.117e-06 ***
Q8_Q8_1 4.90 1 5.9015 0.015349 *
Q10 10.59 1 12.7473 0.000378 ***
Residuals 659.37 794
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
fit.requirement
Linear mixed model fit by maximum likelihood ['lmerModLmerTest']
Formula: log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
Data: df
AIC BIC logLik deviance df.resid
1758.4979 1800.6594 -870.2489 1740.4979 791
Random effects:
Groups Name Std.Dev.
Student (Intercept) 0.7331
phase (Intercept) 0.2366
Residual 0.5700
Number of obs: 800, groups: Student, 159; phase, 5
Fixed Effects:
(Intercept) log_count Q7_Q7_1 Q7_Q7_2 Q8_Q8_1 Q10
-0.006829 0.081058 -0.220287 0.132015 0.002427 0.125682
plot(resid(m, type = "pearson") ~ fitted(m))
qqnorm(resid(m, type = "pearson"))
qqline(resid(m, type = "pearson"))
anova(fit.requirement, fit.requirement.full)
Data: df
Models:
fit.requirement: log_user_requirement ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
fit.requirement.full: log_user_requirement ~ factor(Group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + (1 | Student) + (1 | phase)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
fit.requirement 9 1758.5 1800.7 -870.25 1740.5
fit.requirement.full 12 1759.1 1815.3 -867.55 1735.1 5.3976 3 0.1449